import pandas as pdfrom pandasql import sqldfimport plotly.graph_objects as gofrom plotly.subplots import make_subplots
data = pd.read_excel("SupplyChainDataRev1.xlsx")sqldf("select * from data limit 5")
SupplierID
ForeignMainland
Cycletime
Location
Class
DefectRate
Cost
0
7503961
U
49.0
D
Gold
4.0
5.0
1
3209120
F
70.0
D
Silver
5.0
8.0
2
1783896
M
NaN
C
Silver
5.0
1.0
3
9060334
None
49.0
D
Tin
7.0
7.0
4
3571171
F
66.0
F
Silver
9.0
12.0
q1 ="""select"Cycletime"fromdatawhere"Cycletime" is not null"""sqldf(q1).describe()
Cycletime
count
4664.000000
mean
53.719554
std
13.301960
min
18.000000
25%
44.000000
50%
54.000000
75%
64.000000
max
79.000000
q3 ="""with bin_data as (selectcase when "Cycletime" >= 15 and "Cycletime" <= 19 then '15-19' when "Cycletime" >= 20 and "Cycletime" <= 24 then '20-24' when "Cycletime" >= 25 and "Cycletime" <= 29 then '25-29' when "Cycletime" >= 30 and "Cycletime" <= 34 then '30-34' when "Cycletime" >= 35 and "Cycletime" <= 39 then '35-39' when "Cycletime" >= 40 and "Cycletime" <= 44 then '40-44' when "Cycletime" >= 45 and "Cycletime" <= 49 then '45-49' when "Cycletime" >= 50 and "Cycletime" <= 54 then '50-54' when "Cycletime" >= 55 and "Cycletime" <= 59 then '55-59' when "Cycletime" >= 60 and "Cycletime" <= 64 then '60-64' when "Cycletime" >= 65 and "Cycletime" <= 69 then '65-69' when "Cycletime" >= 70 and "Cycletime" <= 74 then '70-74' when "Cycletime" >= 75 and "Cycletime" <= 79 then '75-79' else '>80'end as "bins",count(*) as frequencyfromdatawhere "Cycletime" is not nullgroup by "bins"order by "bins")select*,cast(sum("frequency") over(order by "bins") as float) / cast(sum("frequency") over() as float) as "cumulative_percent"from bin_data"""bin_data = sqldf(q3)sqldf(q3)